##############################################################
##############################################################
### Replication Materials for
### Stefan Müller and Liam Kneafsey:
### Evidence for the Irrelevance of Irrelevant Events
### Political Science Research and Methods
###
### Please get in touch with the authors if you have any questions: 
### stefan.mueller@ucd.ie
### Note: 0000_readme.pdf contains 
### instructions and details about each script and dataset
##############################################################
##############################################################

## load packages

library(haven)   # CRAN v2.3.1
library(ggplot2) # CRAN v3.3.3
library(dplyr)   # CRAN v1.0.5
library(car)     # CRAN v3.0-10
library(scales)  # CRAN v1.1.1
library(forcats) # CRAN v0.5.1
library(tidyr)   # CRAN v1.1.3


## show how we subsetted the 2014 EES raw data which you can download after 
## a free registration at: https://doi.org/10.4232/1.12628
## dat_raw <- haven::read_dta("data/ZA5160_v4-0-0.dta")
## 
## dat <- select(dat_raw, starts_with("qpp23_"),
##               countrycode)
## saveRDS(dat, "data/data_ess_2014_knowledge.rds")

dat <- readRDS("data_ess_2014_knowledge.rds")

source("function_theme_base.R")

## recode knowledge variables by coding "correct" and incorrect/missing answers
dat_knowledge <- dat %>% 
    mutate(know_ch = car::recode(qpp23_1, "-9=0;1=1;2=0"),
           know_ep = car::recode(qpp23_2, "-9=0;1=1;2=0"),
           know_natparl = car::recode(qpp23_3, "-9=0;1=1;2=0"),
           know_pm = car::recode(qpp23_4, "-9=0;1=1;2=0")) %>% 
    mutate(know_scale = know_ch +  know_ep + know_natparl +
               know_pm, na.rm = TRUE)


recode_countries <- c('"1040"="Austria";
"1056"="Belgium";
"1100"="Bulgaria";
"1191"="Croatia";
"1196"="Cyprus";
"1203"="Czech Republic";
"1208"="Denmark";
"1233"="Estonia";
"1246"="Finland";
"1250"="France";
"1276"="Germany";
"1300"="Greece";
"1348"="Hungary";
"1372"="Ireland";
"1380"="Italy";
"1428"="Latvia";
"1440"="Lithuania";
"1442"="Luxembourg";
"1470"="Malta";
"1528"="The Netherlands";
"1616"="Poland";
"1620"="Portugal";
"1642"="Romania";
"1703"="Slovakia";
"1705"="Slovenia";
"1724"="Spain";
"1752"="Sweden";
"1826"="United Kingdom"')

dat_knowledge <- dat_knowledge %>% 
    mutate(country = car::recode(as.character(countrycode), 
                                 recode_countries))


nrow(dat_knowledge)

length(unique(dat_knowledge$country))

## get respondents by country
n_respondents <- dat_knowledge %>% 
    group_by(country) %>% 
    count() %>% 
    ungroup() %>% 
    arrange(-n)

mean(n_respondents$n)

## convert to long format and identify Irish respondents
dat_knowledge_long <- dat_knowledge %>% 
    select(know_ch:know_pm, country) %>% 
    gather(question, value, -country) %>% 
    mutate(ireland_dummy = ifelse(country == "Ireland", "Ireland", "Other EU member states")) 


## get averages for Irish respondents/all other respondents and confidence intervals
dat_knowledge_questions <- dat_knowledge_long %>%   
    group_by(question, ireland_dummy) %>% 
    summarise(mean = mean(value),
              se = sd(value) / sqrt(n()),
              ci_lower_95 = mean - 1.96 * se,
              ci_upper_95 = mean + 1.96 * se,
              ci_lower_90 = mean - 1.645 * se,
              ci_upper_90 = mean + 1.645 * se)

table(dat_knowledge_questions$question)

recode_questions <- c("
                      'know_ch'='Switzerland member of EU';
                      'know_ep'='Allocation of seats in the European Parliament';
                      'know_natparl'='Size of national legislature';
                      'know_pm'='Party of current Prime Minister'")


dat_knowledge_questions <- dat_knowledge_questions %>% 
    mutate(question_detail = car::recode(question, recode_questions)) %>% 
    mutate(percentage_print = paste0(round(mean * 100, 0), "%")) %>% 
    mutate(question_type = ifelse(question %in% c("know_ch", "know_ep"),
                                  "EU politics:", "Domestic politics:"))


dat_knowledge_questions$question_type <- factor(dat_knowledge_questions$question_type,
                                                levels = c("EU politics:", "Domestic politics:"))
ggplot(dat_knowledge_questions, 
       aes(x = forcats::fct_rev(ireland_dummy), y = mean,
           colour = ireland_dummy)) +
    geom_point() +
    geom_linerange(aes(ymin = ci_lower_95,
                       ymax = ci_upper_95),
                   size = 0.5) +
    geom_text(aes(label = percentage_print),
              nudge_x = 0.3) +
    geom_linerange(aes(ymin = ci_lower_90,
                       ymax = ci_upper_90),
                   size = 1.3) +
    scale_colour_manual(values = c("black", "grey50")) +
    coord_flip() +
    facet_wrap(question_type~question_detail, nrow = 4,
               scales = "free_x") +
    scale_y_continuous(limits = c(0, 1),
                       labels = scales::percent_format(accuracy = 1)) +
    labs(y = "Correct answers to question", 
         x = NULL) +
    theme(legend.position = "none")
ggsave("fig_a31.pdf", 
       width = 9, height = 8)



table(dat_knowledge_long$question,
      dat_knowledge_long$country)



table(dat_knowledge$know_scale,
      dat_knowledge$country)


## calculate average of corrrectly answered questions by country

dat_knowledge_scale_sum <- dat_knowledge %>% 
    group_by(country) %>% 
    summarise(mean = mean(know_scale),
              se = sd(know_scale) / sqrt(n()),
              ci_lower_95 = mean - 1.96 * se,
              ci_upper_95 = mean + 1.96 * se,
              ci_lower_90 = mean - 1.645 * se,
              ci_upper_90 = mean + 1.645 * se)


dat_knowledge_scale_sum <- dat_knowledge_scale_sum %>% 
    mutate(ireland_dummy = ifelse(country == "Ireland", TRUE, FALSE))


ggplot(dat_knowledge_scale_sum, 
       aes(x = reorder(country, mean), y = mean,
           colour = ireland_dummy)) +
    geom_point() +
    geom_text(aes(label = country,
                  y = mean),
              hjust = 1, nudge_y = -0.07) +
    geom_linerange(aes(ymin = ci_lower_95,
                       ymax = ci_upper_95),
                   size = 0.5) +
    geom_linerange(aes(ymin = ci_lower_90,
                       ymax = ci_upper_90),
                   size = 1.3) +
    scale_colour_manual(values = c("grey50", "black")) +
    coord_flip() +
    scale_y_continuous(limits = c(0, 2),
                       breaks = c(seq(0, 2, 0.5))) +
    labs(y = "Number of questions answered correctly (out of 4)", 
         x = NULL) +
    theme(legend.position = "none",
          axis.text.y = element_blank(),
          axis.ticks.y = element_blank())
ggsave("fig_a30.pdf", 
       width = 9, height = 6)



## download RAW INES data at https://www.ucd.ie/issda/data/irishnationalelectionstudy/
## Here, we selected only the relevant variables, 
## and saved the subsetted dataset
## ines_raw <- foreign::read.dta("data_raw/INESLong_Beta.dta")
## ines_subset <- select(ines_raw, id,
##                       c(v0001, v0870,
##                         v0871, v0872,
##                         v0873, ines))
## saveRDS(ines_subset, "data_ines_subset.rds")

## load dataset
ines_subset <- readRDS("data_ines_subset.rds")

## for election in 2002 check whether respondent contacted politician
ines_2002 <- ines_subset %>% 
    mutate(year = ines,
           contact = v0870) %>% 
    filter(year == 2002)

table(ines_2002$v0870)


ines_2002 <- ines_2002 %>% 
    mutate(contact_td = case_when(
        v0871 == "td" ~ 1,
        v0872 == "td" ~ 1,
        v0873 == "td" ~ 1
    ))


ines_2002 <- ines_2002 %>% 
    mutate(contact_councillor = case_when(
        v0871 == "councillors" ~ 1,
        v0872 == "councillors" ~ 1,
        v0873 == "councillors" ~ 1
    ))

ines_2002_contact <- ines_2002 %>% 
    select(starts_with("contact_")) %>% 
    gather(type, yes) %>% 
    mutate(yes = ifelse(is.na(yes), 0, yes)) %>% 
    mutate(type_plot = str_remove_all(type, "contact_")) %>% 
    mutate(type_plot = car::recode(type_plot, "'td'='TD';
                                 'councillor'='Local councilor'")) 

## mean and 95 per cent CIs

dat_sum_contact <- ines_2002_contact %>%
    group_by(type_plot) %>%
    summarise(mean = mean(yes),
              se = sd(yes) / sqrt(n()),
              ci_lower_95 = mean - 1.96 * se,
              ci_upper_95 = mean + 1.96 * se,
              ci_lower_90 = mean - 1.645 * se,
              ci_upper_90 = mean + 1.645 * se)


ggplot(data = dat_sum_contact,
       aes(x = type_plot,
           y = mean)) +
    geom_point() +
    geom_linerange(aes(ymin = ci_lower_95, ymax = ci_upper_95)) +
    geom_linerange(aes(ymin = ci_lower_90, ymax = ci_upper_90),
                   size = 1.3) +
    scale_y_continuous(labels = scales::percent_format(accuracy = 1),
                       limits = c(0, 0.3)) +
    coord_flip() + 
    geom_text(aes(label = paste0(100 * round(mean,2), "%")),
              nudge_x = 0.3, vjust = 0.5) +
    labs(x = NULL,
         y = "Percentage of respondents who contacted politician") +
    guides(colour = guide_legend(reverse = TRUE),
           shape = guide_legend(reverse = TRUE)) 
ggsave("fig_a32.pdf", 
       width = 9, height = 2.5)


## now check whether one or more politicians called the respondent's home
ines_candidate_called <- ines_subset %>% 
    filter(ines %in% c(2002, 2007)) %>% 
    select(candidate_called_home = v0001, ines, id) %>% 
    filter(!is.na(candidate_called_home)) %>% 
    mutate(candidate_called_home = ifelse(candidate_called_home == "yes", 1, 0))


dat_sum_candidate_called <- ines_candidate_called %>%
    group_by(ines) %>%
    summarise(mean = mean(candidate_called_home),
              se = sd(candidate_called_home) / sqrt(n()),
              ci_lower_95 = mean - 1.96 * se,
              ci_upper_95 = mean + 1.96 * se,
              ci_lower_90 = mean - 1.645 * se,
              ci_upper_90 = mean + 1.645 * se)


dat_sum_candidate_called$ines <- factor(dat_sum_candidate_called$ines,
                                        levels = c("2007", "2002"))

## Figure A33 ----
ggplot(data = dat_sum_candidate_called,
       aes(x = factor(ines),
           y = mean)) +
    geom_point() +
    geom_linerange(aes(ymin = ci_lower_95, ymax = ci_upper_95)) +
    geom_linerange(aes(ymin = ci_lower_90, ymax = ci_upper_90),
                   size = 1.3) +
    scale_y_continuous(labels = scales::percent_format(accuracy = 1),
                       limits = c(0, 1)) +
    coord_flip() +
    geom_text(aes(label = paste0(100 * round(mean,2), "%")),
              nudge_x = 0.3, vjust = 0.5) +
    labs(x = NULL,
         y = "At least one candidate called to respondent's home") +
    guides(colour = guide_legend(reverse = TRUE),
           shape = guide_legend(reverse = TRUE)) 
ggsave("fig_a33.pdf", 
       width = 9, height = 2.5)

## script executed successfully on
date()

sessionInfo()
